﻿using DotNetCommon.Extensions;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace DotNetCommon
{
    /// <summary>
    /// 中文乱码帮助类(GBK or UTF-8)<para></para>
    /// 检测的原理: 将给定的字符串转成unicode数字，如果在常见汉字和符号内则不乱码，否则判定为乱码<para></para>
    /// 当读取可能乱码的字符串时，需要先使用UTF-8读取，检查读取的结果有乱码的时候再用GBK读取，如果首先用GBK读取，那么不太能检查的出来。<para></para>
    /// </summary>
    public class LuanMaHelper
    {
        private static bool _isInitGBK = false;
        /// <summary>
        /// 初始化GBK编码支持
        /// </summary>
        public static void InitGBK()
        {
            if (!_isInitGBK)
            {
                lock (typeof(LuanMaHelper))
                {
                    if (!_isInitGBK)
                    {
                        System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
                        _isInitGBK = true;
                    }
                }
            }
        }

        static LuanMaHelper()
        {
            InitGBK();
        }

        private static Encoding unicode = Encoding.Unicode;
        private static Encoding utf8 = Encoding.UTF8;
        private static Lazy<Encoding> _gbk = new Lazy<Encoding>(() =>
         {
             InitGBK();
             return Encoding.GetEncoding("GBK");
         });
        private static Encoding gbk => _gbk.Value;

        /// <summary>
        /// 是否是正常字符,不正常字符将判别为乱码<br />
        /// emoji表情判为乱码<br />
        /// <seealso href="https://blog.csdn.net/hherima/article/details/9045861"/>
        /// </summary>
        /// <returns></returns>
        public static bool IsNormalChar(char c)
        {
            if (c == '\0') return true;
            var bs = unicode.GetBytes(new[] { c });
            if (bs.Length != 2)
            {
                return false;
            }
            int total = (bs[1] << 8) + bs[0];
            if (c >= 0x0021 && c <= 0x007E)
            {
                /*前256个字符中的
                !,",#,$,%,&,',(,),*,+,,,-,.,/,0,1,2,3,4,5,6,7,8,9,:,;,<,=,>,?,@,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,[,\,],^,_,`,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,{,|,},~
                 * */
                return true;
            }
            if (c > 0x0000 && c < 0x00FF)
            {
                //前256个字符统一不乱码
                return true;
            }
            if (total >= 0x4E00 && total < 0x9FFF)
            {
                //4E00-9FFF：CJK 统一表意符号 (CJK Unified Ideographs)
                return true;
            }
            if (total >= 0x2150 && total < 0x218F)
            {
                //2150-218F：数字形式 (Number Form) 
                return true;
            }
            if (total >= 0x2200 && total < 0x22FF)
            {
                //2200-22FF：数学运算符 (Mathematical Operator) 
                return true;
            }
            if (total >= 0x2460 && total < 0x24FF)
            {
                //2460-24FF：封闭式字母数字 (Enclosed Alphanumerics) 
                return true;
            }
            if (total >= 0x3000 && total < 0x303F)
            {
                //3000-303F：CJK 符号和标点 (CJK Symbols and Punctuation) 
                return true;
            }
            if (total >= 0xFF00 && total < 0xFFEF)
            {
                //FF00-FFEF：半型及全型形式 (Halfwidth and Fullwidth Form) 
                return true;
            }
            return false;
        }

        /// <summary>
        /// 返回某个字符的Unicode数值
        /// </summary>
        /// <param name="c"></param>
        /// <returns></returns>
        public static int GetUnicode(char c)
        {
            var bs = unicode.GetBytes(new[] { c });
            if (bs.Length < 2) return bs[0];
            int total = (bs[1] << 8) + bs[0];
            return total;
        }

        /// <summary>
        /// 返回某个字符的Unicode16进制表示字符串
        /// </summary>
        /// <param name="c"></param>
        /// <returns></returns>
        public static string GetUnicodeHex(char c)
        {
            var bs = unicode.GetBytes(new[] { c });
            if (BitConverter.IsLittleEndian) Array.Reverse(bs);
            return "0x" + BytesToHexConverter.ToHex(bs);
        }

        /// <summary>
        /// 是否是乱码
        /// </summary>
        /// <returns></returns>
        public static bool IsLuanMa(string str)
        {
            if (string.IsNullOrWhiteSpace(str)) return false;
            foreach (var c in str)
            {
                if (!IsNormalChar(c)) return true;
            }
            return false;
        }

        /// <summary>
        /// 检测乱码并定位乱码位置,示例如下:
        /// <code>
        /// var res = LuanMaHelper.Detect("�");//Unicode: 0xFFFD
        /// res.Position.ShouldBe("第 1 行, 1 列出现乱码,乱码字符: [�],乱码判定规则: [0xFFFD] 不在常用Unicode范围内!");
        /// </code>
        /// </summary>
        public static LuanMaDetectResult Detect(string str)
        {
            if (str.IsNullOrEmptyOrWhiteSpace()) return new LuanMaDetectResult
            {
                IsLuanMa = false
            };
            var lines = str.Split(new char[] { '\r', '\n' }).ToList();
            for (var i = 0; i < lines.Count; i++)
            {
                var line = lines[i];
                for (var j = 0; j < line.Length; j++)
                {
                    if (!LuanMaHelper.IsNormalChar(line[j]))
                    {
                        return new LuanMaDetectResult
                        {
                            IsLuanMa = true,
                            Position = $"第 {i + 1} 行, {j + 1} 列出现乱码,乱码字符: [{line[j]}],乱码判定规则: [{GetUnicodeHex(line[j])}] 不在常用Unicode范围内!",
                            Row = i + 1,
                            Col = j + 1,
                            Character = line[j]
                        };
                    }
                }
            }
            return new LuanMaDetectResult
            {
                IsLuanMa = false
            };
        }

        /// <summary>
        /// 读取string(自动识别编码: GBK or UTF8)
        /// </summary>
        /// <param name="bs"></param>
        /// <returns></returns>
        public static string GetString(byte[] bs)
        {
            var str = utf8.GetString(bs);
            if (IsLuanMa(str)) str = gbk.GetString(bs);
            return str;
        }

        /// <summary>
        /// 读取string(自动识别编码: GBK or UTF8)
        /// </summary>
        /// <param name="filePath">文件路径</param>
        /// <returns></returns>
        public static string ReadAllText(string filePath)
        {
            if (filePath.IsNullOrEmptyOrWhiteSpace()) return string.Empty;
            if (!File.Exists(filePath)) return string.Empty;
            var bs = File.ReadAllBytes(filePath);
            if (bs.IsNullOrEmpty()) return string.Empty;
            if (bs.Length >= 3 && bs[0] == 0xEF && bs[1] == 0xBB && bs[2] == 0xBF)
            {
                return Encoding.UTF8.GetString(bs.Skip(3).ToArray());
            }
            return GetString(bs);
        }

        /// <summary>
        /// 乱码检测和定位的结果
        /// </summary>
        public class LuanMaDetectResult
        {
            /// <summary>
            /// 是否乱码
            /// </summary>
            public bool IsLuanMa { get; set; }
            /// <summary>
            /// 初个乱码字符的位置描述
            /// </summary>
            public string Position { get; set; }
            /// <summary>
            /// 初个乱码字符所处行号(从1开始)
            /// </summary>
            public int Row { get; set; }
            /// <summary>
            /// 初个乱码字符所处列号(从1开始)
            /// </summary>
            public int Col { get; set; }
            /// <summary>
            /// 初个乱码字符
            /// </summary>
            public char Character { get; set; }

        }
    }
}
